home *** CD-ROM | disk | FTP | other *** search
- {*********************************************************}
- {* AASplChk *}
- {* Copyright (c) Julian M Bucknall 1999 *}
- {* All rights reserved. *}
- {*********************************************************}
- {* Algorithms Alfresco spell checker *}
- {*********************************************************}
-
- {Note: this unit is released as freeware. In other words, you are free
- to use this unit in your own applications, however I retain all
- copyright to the code. JMB}
-
- unit AASplChk;
-
- interface
-
- uses
- SysUtils,
- Classes;
-
- type
- TaaSoundex = string[4];
- PaaWordStr = ^TaaWordStr;
- TaaWordStr = string[255];
-
- type
- TaaSpellChecker = class
- private
- FWordTable : TList;
- FSoundexTable : TList;
- FWordCount : longint;
- protected
- procedure scAddWordEntry(const aWord : PaaWordStr);
- procedure scAddSoundexEntry(const aSoundex : TaaSoundex;
- aWord : PaaWordStr);
- procedure scBuildTables(const aWordListFile : string);
- procedure scFreeTables;
-
- public
- constructor Create(const aWordListFile : string);
- destructor Destroy; override;
-
- function WordExists(aWord : string) : boolean;
- procedure GetAlternatives(const aWord : string;
- aList : TStrings);
-
- property WordCount : longint read FWordCount;
- end;
-
- function AASoundex(const aWord : string) : TaaSoundex;
-
- implementation
-
- {===Soundex function=================================================}
- function AASoundex(const aWord : string) : TaaSoundex;
- const
- Encode : array ['A'..'Z'] of char =
- ('0', '1', '2', '3', '0', '1', '2', '/', '0', '2', '2',
- '4', '5', '5', '0', '1', '2', '6', '2', '3', '0', '1',
- '/', '2', '0', '2');
- var
- Ch : char;
- Code, OldCode : char;
- SxInx : integer;
- i : integer;
- begin
- Result := 'A000';
- if (aWord = '') then
- raise Exception.Create('Soundex: input string is empty');
- Ch := UpCase(aWord[1]);
- if not ('A' <= Ch) and (Ch <= 'Z') then
- raise Exception.Create('Soundex: unknown character in input string');
- Result[1] := Ch;
- Code := Encode[Ch];
- OldCode := Code;
- SxInx := 2;
- for i := 2 to length(aWord) do begin
- if (Code <> '/') then
- OldCode := Code;
- Ch := UpCase(aWord[i]);
- if not ('A' <= Ch) and (Ch <= 'Z') then
- Code := '0'
- else
- Code := Encode[Ch];
- if (Code <> OldCode) and (Code > '0') then begin
- Result[SxInx] := Code;
- inc(SxInx);
- if (SxInx > 4) then
- Break;
- end;
- end;
- end;
- {====================================================================}
-
-
- {====================================================================}
- function AllocWordString(const aWord : string) : PaaWordStr;
- begin
- GetMem(Result, succ(length(aWord)));
- Result^ := aWord;
- end;
- {--------}
- procedure FreeWordString(aWord : PaaWordStr);
- begin
- FreeMem(aWord, succ(length(aWord^)));
- end;
- {--------}
- function LowCase(Ch : char) : char;
- begin
- if ('A' <= Ch) and (Ch <= 'Z') then
- Result := char(ord(Ch) + (ord('a') - ord('A')))
- else
- Result := Ch;
- end;
- {====================================================================}
-
-
- {===Hash algorithms==================================================}
- function CalcELFHash(const S : string) : longint;
- var
- G : longint;
- i : integer;
- begin
- Result := 0;
- for i := 1 to length(S) do begin
- Result := (Result shl 4) + ord(S[i]);
- G := Result and longint($F0000000);
- if (G <> 0) then
- Result := Result xor (G shr 24);
- Result := Result and (not G);
- end;
- end;
- {--------}
- function CalcSoundexHash(const S : TaaSoundex) : integer;
- begin
- Result := ((ord(S[1]) - ord('A')) * 343) +
- ((ord(S[2]) - ord('0')) * 49) +
- ((ord(S[3]) - ord('0')) * 7) +
- (ord(S[4]) - ord('0'));
- end;
- {====================================================================}
-
-
- {===TaaSpellChecker==================================================}
- const
- WordTableSize = 10007; {a prime}
- SoundexTableSize = 26*7*7*7; {the exact number of Soundexes}
- {--------}
- constructor TaaSpellChecker.Create(const aWordListFile : string);
- begin
- inherited Create;
- FWordTable := TList.Create;
- FWordTable.Count := WordTableSize;
- FSoundexTable := TList.Create;
- FSoundexTable.Count := SoundexTableSize;
- scBuildTables(aWordListFile);
- end;
- {--------}
- destructor TaaSpellChecker.Destroy;
- begin
- scFreeTables;
- inherited Destroy;
- end;
- {--------}
- procedure TaaSpellChecker.GetAlternatives(const aWord : string;
- aList : TStrings);
- var
- Soundex : TaaSoundex;
- Hash : integer;
- i : integer;
- ThisList: TList;
- begin
- if not Assigned(aList) then
- Exit;
- aList.Clear;
- Soundex := AASoundex(aWord);
- Hash := CalcSoundexHash(Soundex);
- if (FSoundexTable[Hash] <> nil) then begin
- ThisList := TList(FSoundexTable[Hash]);
- for i := 0 to pred(ThisList.Count) do
- aList.Add(PaaWordStr(ThisList[i])^);
- end;
- end;
- {--------}
- procedure TaaSpellChecker.scAddSoundexEntry(const aSoundex : TaaSoundex;
- aWord : PaaWordStr);
- var
- Hash : integer;
- begin
- Hash := CalcSoundexHash(aSoundex);
- if FSoundexTable[Hash] = nil then
- FSoundexTable[Hash] := TList.Create;
- TList(FSoundexTable[Hash]).Add(aWord);
- end;
- {--------}
- procedure TaaSpellChecker.scAddWordEntry(const aWord : PaaWordStr);
- var
- Hash : longint;
- begin
- Hash := CalcELFHash(aWord^) mod WordTableSize;
- if FWordTable[Hash] = nil then
- FWordTable[Hash] := TList.Create;
- TList(FWordTable[Hash]).Add(aWord);
- end;
- {--------}
- procedure TaaSpellChecker.scBuildTables(const aWordListFile : string);
- var
- F : text;
- TheWord : TaaWordStr;
- OurWord : PaaWordStr;
- Soundex : TaaSoundex;
- i : integer;
- begin
- System.Assign(F, aWordListFile);
- System.Reset(F);
- try
- repeat
- readln(F, TheWord);
- for i := 1 to length(TheWord) do
- TheWord[i] := LowCase(TheWord[i]);
- OurWord := AllocWordString(TheWord);
- scAddWordEntry(OurWord);
- Soundex := AASoundex(TheWord);
- scAddSoundexEntry(Soundex, OurWord);
- until EOF(F);
- finally
- System.Close(F);
- end;
- end;
- {--------}
- procedure TaaSpellChecker.scFreeTables;
- var
- i, j : integer;
- ThisList : TList;
- begin
- if (FSoundexTable <> nil) then begin
- for i := 0 to pred(SoundexTableSize) do
- TList(FSoundexTable[i]).Free;
- FSoundexTable.Free;
- end;
- if (FWordTable <> nil) then begin
- for i := 0 to pred(WordTableSize) do begin
- ThisList := TList(FWordTable[i]);
- if Assigned(ThisList) then begin
- for j := 0 to pred(ThisList.Count) do
- FreeWordString(PaaWordStr(ThisList[j]));
- TList(FWordTable[i]).Free;
- end;
- end;
- FWordTable.Free;
- end;
- end;
- {--------}
- function TaaSpellChecker.WordExists(aWord : string) : boolean;
- var
- i : integer;
- Hash : longint;
- ThisList : TList;
- begin
- Result := false;
- for i := 1 to length(aWord) do
- aWord[i] := LowCase(aWord[i]);
- Hash := CalcELFHash(aWord) mod WordTableSize;
- if (FWordTable[Hash] = nil) then
- Exit;
- ThisList := TList(FWordTable[Hash]);
- for i := 0 to pred(ThisList.Count) do begin
- if (CompareText(PaaWordStr(ThisList[i])^, aWord) = 0) then begin
- Result := true;
- Exit;
- end;
- end;
- end;
- {====================================================================}
-
- end.
-